/* All tables and figures for JPubE R&R. 2025 */
clear all
set scheme s1color

cd "/Users/aguo/Dropbox/newemployer_ui/replication/"

{
/****************************/
/*	 PART I: 	Data build  */
/****************************/

/* (1) MAIN SAMPLE: bds_analysis_sample_2025 */
** State-by-Sector-by-Firm Age file downloaded from https://www.census.gov/data/datasets/time-series/econ/bds/bds-datasets.html
import delimited data/bds2020_st_sec_fa.csv, clear
rename st state
replace firms = "0" if firms=="(D)" | firms=="(X)"
replace emp = "0" if emp=="(D)" | emp=="(X)"
gen n2 = substr(sector,1,2)
destring n2 firms emp, replace

bysort state year sector: egen wgt = sum(firms)
sort state sector fage year
by state sector: gen lag_wgt = wgt[_n-1]
keep if fage=="a) 0"
drop if wgt==0
replace lag_wgt = 1 if lag_wgt==0
gen entryrate = 100*firms/lag_wgt 
replace entryrate = 100 if entryrate>100 & entryrate!=.

gen logenter = log(1+firms) // Note that BDS censors small values (we don't observe firm counts below 3)
gen logenter_noplus1 = log(firms) // For robustness check
gen log_emp = log(emp)

merge 1:1 state year sector using data/newemployer, keepusing(newrate maxrate base personalinc corporate minwage benefit stunemployment)
keep if _merge==3
drop _merge

gen logbase = log(base)
gen tax = (newrate*base/100)
gen logtax = log(newrate*base/100)
gen logmax = log(maxrate*base/100)
gen log_benefit = log(benefit)
gen index = inlist(state,2,15,16,19,27,30,32,34,35,37,38,40,41,44,49,53,56)
gen reserve = inlist(state,4,5,6,8,11,13,15,16,19,21,22,23,24,26,30,31,32,33,34,35,37,38,39,40,44,46,47,53,55)
	replace reserve = . if inlist(state,2) // Alaska is a separate type than reserve ratio or benefit ratio ("payroll variation") - so, remove from consideration here
	
xtile ubin = stunemploymentrate, nquant(20)
xtile bbin = benefit, nquant(10)
egen cluster = concat(n2 state)

// SET THE SAMPLE
qui reghdfe logenter newrate logbase personalinc corporate log_benefit minwage stunemploymentrate  [aw=wgt], a(n2#year n2#state ubin bbin) cluster(cluster)
 
keep if e(sample)
save bds_analysis_sample_2025.dta, replace

// ADD EXIT AND AGE 1 EMPLOYMENT
import delimited data/bds2020_st_sec_fa.csv, clear
rename st state
foreach x of varlist emp firms firmdeath_firms {
	replace `x' = "" if `x'=="(D)" | `x'=="(X)"
}
gen n2 = substr(sector,1,2)
destring n2 emp firms firmdeath_firms, replace
keep if fage=="b) 1"
	
gen exit_age1 = 100*firmdeath_firms/firms
gen emp_perfirm = emp/firms
keep state year sector exit emp_perfirm
tempfile temp	
save `temp', replace 
	
use bds_analysis_sample_2025.dta, clear
merge 1:1 state year sector using `temp'
keep if _merge==3
drop _merge 
save bds_analysis_sample_2025, replace
	

//////////////////////////////////////////////
/* (2) BFS SAMPLE: bfs_analysis_sample_2025 */
//////////////////////////////////////////////
import delimited data/bfs_monthly.csv, clear

drop if inlist(geo,"US","NO","MW","SO","WE")
rename geo State_Abbrev

// merge in state abbreviations
merge m:1 State_Abbrev using data/state_abbreviations
	keep if _merge==3
	drop _merge
	
rename fips state

keep if inlist(series,"BA_BA","BA_CBA","BA_HBA","BA_WBA")
	/* per documentations:
	
			• 	Business Applications (BA): The core business applications series that correspond to a subset of
			all applications for an EIN. This series includes all applications for an EIN, except for applications
			for tax liens, estates, and trusts, applications outside of the 50 states and the District of
			Columbia or those with no state-county geocodes, applications with certain NAICS codes in
			sector 11 (agriculture, forestry, fishing and hunting) or 92 (public administration) that have low
			transition rates, and applications in certain industries (i.e. private households, certain financial
			services, civic and social organizations).
			• High-Propensity Business Applications (HBA): Business Applications (BA) that have a highpropensity of turning into businesses with payroll. The identification of high-propensity
			applications is based on the characteristics of applications revealed on the IRS Form SS-4 that
			are associated with a high rate of business formation. High-propensity applications include
			applications: (a) from a corporate entity, (b) that indicate they are hiring employees, (c) that
			provide a first wages-paid date (planned wages); or (d) that have a NAICS industry code in
			accommodation and food services (72) or in portions of construction (237, 238), manufacturing
			(312, 321, 322, 332), retail (44, 452), professional, scientific, and technical services (5411,
			5413), educational services (6111), and health care (621, 623).
			• Business Applications with Planned Wages (WBA): High-Propensity Business Applications
			(HBA) that indicate a first wages-paid date on the IRS Form SS-4. The indication of a wages-paid
			date is associated with a high likelihood of transitioning into a business with payroll.
			• Business Applications from Corporations (CBA): High-Propensity Business Applications (HBA)
			from a corporation or personal service corporation, based on the legal form of organization
			stated in the IRS Form SS-4. Similar to the WBA series, this series is important primarily because
			it consists of a set of applications that have a high rate of transitioning into businesses with
			payroll.
			
			*/
			
keep if sa == "U" //Not seasonally adjusted
// get annual counts, then reshape 
foreach m in jan feb mar apr may jun jul aug sep oct nov dec{
	destring `m', replace
}
gen annual = jan + feb + mar + apr + may + jun + jul + aug + sep + oct + nov + dec

keep series state year annual 
reshape wide annual, i(state year) j(series) string

rename annual* *
drop if missing(BA_BA, BA_CBA, BA_HBA ,BA_WBA)
		
// logs
gen log_apps = log(BA_BA)
gen log_apps_likely_payroll = log(BA_HBA)

// Merge in tax data (note: BFS 2005-2022)
tempfile temp 
save `temp', replace

use data/newemployer, clear 

	collapse (mean) newrate (median) med = newrate, ///
		by(state year maxrate base personalinc corporate minwage benefit stunemployment)
	drop med 
	
	keep if year>=2005
	merge 1:1 state year using `temp', gen(_merge_ui)

		keep if _merge_ui == 3
		drop _merge_ui

gen logbase = log(base)
gen logmax = log(maxrate*base/100)	
gen logtax = log(newrate*base/100)
gen log_benefit = log(benefit)
// Merge in weights - total number of employment firms in state (all ages); also get new firm count as benchmark

tempfile temp 
save `temp', replace 

// Merge in BDS data for weights
import delimited data/bds2020_st_sec_fa.csv, clear
	rename st state
	replace firms = "0" if firms=="(D)" | firms=="(X)"
	replace emp = "0" if emp=="(D)" | emp=="(X)"
	gen n2 = substr(sector,1,2)
	destring n2 firms emp, replace

	bysort state year: egen wgt = sum(firms)

	keep if fage=="b) 1"
	drop if wgt==0

	collapse (sum) firms=firms, by(state year wgt)
	gen logenter = log(firms) 
	
	merge 1:1 state year using `temp', gen(_merge_wgt)
	
	keep if _merge_wgt ==3
	drop _merge_wgt


xtile ubin = stunemploymentrate , nquant(20)
xtile bbin = benefit, nquant(10)
egen cluster = concat(state) 

save bfs_analysis_sample_2025, replace 


//////////////////////////////////////////////////////////
/* (3) BORDER COUNTY SAMPLE: bordercounties_analysis_sample_2025 */
//////////////////////////////////////////////////////////
import delimited data/bds2020_st_sec_fa.csv, clear
rename st state
replace firms = "0" if firms=="(D)" | firms=="(X)"
replace emp = "0" if emp=="(D)" | emp=="(X)"
gen n2 = substr(sector,1,2)
destring n2 firms emp, replace

collapse (sum) firms, by(state year sector)

merge 1:1 state year sector using data/newemployer, keepusing(newrate maxrate base personalinc corporate minwage benefit stunemployment)
keep if _merge==3
drop _merge

bysort state year: egen tot_firms = sum(firms)
gen relwgt = firms/tot_firms

gen wgt_newrate = relwgt*newrate
collapse (sum) wgt_newrate (mean) newrate maxrate base personalinc corporate minwage benefit stunemployment (sd) sd_newrate=newrate, by(state year) // note: only newrate varied by sector 

tempfile temp 
save `temp', replace

// BDS County-level data 
use data/bds_county_data.dta, clear

drop STATE COUNTY
rename *, lower
drop if fage == "001" // pooled ; note, with age breakdown, only get pooled NAICS

destring firm emp year, replace
gen fips = state + county 
destring state fips, replace

bysort fips year: egen wgt = sum(firm)
sort fips fage year
by fips: gen lag_wgt = wgt[_n-1]

keep if fage=="010" // NOTE: unlike bds_entry_analysis, here we can only look at firm age 0 or firm age 1-5, etc. (instead of firm age 1); here we take firm age 0 
drop if wgt==0

bysort fips year: egen wgt_new = sum(firm)
sort fips year
by fips: gen lag_wgt_new = wgt_new[_n-1]

replace lag_wgt = 1 if lag_wgt==0
gen entryrate = 100*firm/lag_wgt 
su entryrate, d 
gen logenter = log(firm+1)
gen log_emp_perfirm = log(emp/firm)
gen log_emp = log(emp)

drop county 
rename fips county

merge m:1 state year using `temp'
	keep if _merge ==3
	drop _merge

keep if year>=2003 & year<=2014
gen log_benefit = log(benefit)
gen logbase = log(base)
gen logtax = log(base*wgt_newrate/100)
gen logtax_simple = log(base*newrate/100)

xtile ubin = stunemploymentrate , nquant(20)
xtile bbin = benefit, nquant(10)
egen cluster = concat(county) // here, county

save bordercounties_analysis_sample_2025, replace

////////////////////////////////////////////////////////
/* (4) SURVIVAL SAMPLE: survival_analysis_sample_2025 */
////////////////////////////////////////////////////////
import delimited data/bds2020_st_sec_fa.csv, clear
rename st state
foreach x of varlist emp firms estabs firmdeath_estabs firmdeath_firms {
	replace `x' = "" if `x'=="(D)" | `x'=="(X)"
}
gen n2 = substr(sector,1,2)
destring n2 emp firms estabs firmdeath_firms firmdeath_estabs, replace

bysort state year sector: egen wgt = sum(firms)
sort state sector fage year
by state sector: gen lag_wgt = wgt[_n-1]
gen exitrate = 100*firmdeath_firms/firms

replace fage = substr(fage,4,2)
keep if inlist(fage,"1","2","3","4","5") 
destring fage, replace

merge m:1 state year sector using data/newemployer, keepusing(newrate maxrate base personal corporate minwage benefit stunemployment avg_rate)
keep if _merge==3
drop _merge

gen logtax = log(newrate*base/100)
gen logmax = log(maxrate*base/100)
gen logavg = log(avg_rate*base) // Assigned missing UI rates the maximum (occurs for Mining)
gen log_benefit = log(benefit)
gen entryyear = year - fage
egen cohort = concat(state n2 entryyear)

xtile ubin = stunemploymentrate , nquant(20)
xtile bbin = benefit, nquant(10)
egen cluster = concat(n2 state)

qui reghdfe exitrate logtax logmax personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt], a(n2#year n2#state ubin bbin cohort fage) cluster(cluster)
keep if e(sample)==1

foreach x of varlist logtax logmax logavg corporate personalinc log_benefit minwage stunemployment {
	su `x'
	replace `x' = `x'/r(sd)
}

forval i = 1/5 {
	gen logtaxXage`i' = logtax*(fage==`i')
	gen logmaxXage`i' = logmax*(fage==`i')
	gen corpXage`i' = corporate*(fage==`i')
	gen personalXage`i' = personalinc*(fage==`i')
	gen benefitXage`i' = benefit*(fage==`i')
	gen mwageXage`i' = minwage*(fage==`i')
	gen URXage`i' = stunemploymentrate*(fage==`i')
}

save survival_analysis_sample_2025, replace
}


{
/****************************/
/*	PART II: 	Tables      */
/****************************/

/*****************************/
/* Table 1: Summary statistics (BDS) */
/*****************************/

use bds_analysis_sample_2025, clear
global mx1 firms logenter entryrate tax logtax logbase newrate base personalinc corporate log_benefit minwage stunemploymentrate

	label var firms "Number of new firms"
	label var logenter "Log(number of new firms)"
	label var entryrate "Entry rate"
	label var tax "New rate*base"
	label var logtax "Log(new rate*base)"
	label var logbase "Log(base)"
	label var newrate "New rate"
	label var base "Base (\textdollar)"
	label var personalinc "Personal inc. tax rate"
	label var corporate "Corporate tax rate"
	label var log_benefit "Log(UI benefits) (\textdollar)"
	label var minwage "Minimum wage (\textdollar)"
	label var stunemploymentrate "State unemp. rate"

eststo clear 
estpost tabstat $mx1 , c(stat) stat(sum mean sd p50 min max n)

	esttab using "table_1_unweighted.tex", replace ////
	 cells("mean(fmt(%6.2fc)) sd(fmt(%6.2fc)) p50(fmt(%6.2fc))  min max count(fmt(%6.0fc))")  nonumber ///
		fragment label gaps nobaselevels   nomtitle  nonote noobs  ///
	  collabels("Mean" "Std Dev" "Median" "Min" "Max" "N") postfoot(\bottomrule)

eststo clear 
estpost tabstat $mx1 [aw=wgt] , c(stat) stat(sum mean sd p50 min max n)

	esttab using "table_1_weighted.tex", replace ////
	 cells("mean(fmt(%6.2fc)) sd(fmt(%6.2fc)) p50(fmt(%6.2fc))  min max count(fmt(%6.0fc))") nonumber ///
		fragment label gaps nobaselevels   nomtitle  nonote noobs  ///
	  collabels("Mean" "Std Dev" "Median" "Min" "Max" "N") postfoot(\bottomrule)
	  
	// In Latex combine into one table; weighted and unweighted mean/std; unweighted p50, min, max; no N 

/*****************************/
/* Table 2: Higher UI taxes predict lower firm entry */
/*****************************/

lab var logenter "Log(New Firms)"
lab var entryrate "Entry Rate"

est clear
foreach x of varlist logenter entryrate {
	eststo: reghdfe `x' logtax [aw=wgt], a(n2#year n2#state) cluster(cluster)
	summ `x' [aw=wgt] if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local n2_state "X"
	estadd local n2_year "X"
	estadd local weighting "X" 
	eststo: reghdfe `x' logtax personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt], a(n2#year n2#state ubin bbin) cluster(cluster)
	summ `x' [aw=wgt] if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local n2_year "X"
	estadd local n2_state "X"
	estadd local bins "X"
	estadd local weighting "X"
	eststo: reghdfe `x' newrate logbase personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt], a(n2#year n2#state ubin bbin) cluster(cluster)
	summ `x' [aw=wgt] if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local n2_state "X"
	estadd local n2_year "X"
	estadd local bins "X"
	estadd local weighting "X"	
	
}
esttab using table_2.tex, replace b(%9.3f) se  ///
	coeflabel(logtax "Log(new rate*base)" logbase "Log(base)" newrate "New rate" personalinc "Personal inc. tax rate" corporate "Corporate tax rate" log_benefit "Log(UI benefits) (\textdollar)" minwage "Minimum wage (\textdollar)" stunemploymentrate "State unemp. rate") star(* 0.10 ** 0.05 *** 0.01) drop(_cons)  ///
	stats(r2 ymean n2_year n2_state bins weighting N, label("R$^2$" "Mean Outcome" "Sector-Year FEs" "Sector-State FEs" "Unemp. and Benefit Bin FEs""Firm Weights" "N") fmt(%9.3f %9.3f 0 0 0 0 %9.0fc)) nomtitles unstack nonote noobs fragment label  nobaselevels   nomtitle postfoot(\bottomrule) order(logtax logbase newrate) 

/*****************************/
/* Table 3: Higher UI taxes predict lower firm entry, across state borders (county-level) */
/*****************************/
est clear
foreach x in logenter entryrate{
	
use bordercounties_analysis_sample_2025, clear

// REG 1: All counties, year and county fixed effects
	eststo m1_`x': reghdfe `x' logbase personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt],  a(year county bbin ubin) cluster(cluster)	
	summ `x' [aw=wgt] if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local samp "All Counties"
	estadd local weighting "X"
	estadd local yfes "X"
	estadd local cfes "X"
	estadd local bins "X"
		
// Merge in Border County Pairs; keep borders 
merge m:1 county using data/bordercounties

	tab _merge, sum(firm)
		
	keep if _merge == 3
	drop _merge
		
// REG 2: Border counties, year and county fixed effects; pre row duplication	
	eststo m2_`x': reghdfe `x' logbase personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt],  a(year county bbin ubin) cluster(cluster)	
	summ `x' [aw=wgt] if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local samp "Border Counties"
	estadd local weighting "X"
	estadd local yfes "X"
	estadd local cfes "X"
	estadd local bins "X"

// reshape; duplicate rows if county has multiple pairs 
reshape long countypair, i(county year) j(countypaircount)
drop if countypair==.	
	
// Weights: for each border county pair-year, adjust weights to be equal to the sum of their weights "in" that pair; 
bysort year countypair: egen count_pairs = count(wgt)
drop if count_pairs < 2
bysort year countypair: egen sum_wgt_pair = sum(wgt)
gen loo_wgt_pair = sum_wgt_pair - wgt 
bysort year county: egen sum_loo = sum(loo_wgt_pair)
gen own_add = wgt * (loo_wgt_pair / sum_loo)	
bysort year countypair: egen wgt_adj = sum(own_add)	
replace wgt_adj = 0 if count_pairs < 2
	
// REG 3: Border counties, year-county pair and county fixed effects; post row duplication, with weights
// without fes:

	eststo m3_`x': reghdfe `x' logbase personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt_adj],  a(year county bbin ubin) cluster(cluster)
	summ `x' [aw=wgt_adj] if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local samp "Border County Pairs" 
	//estadd local weighting "Firms in County Pair"
	estadd local weighting2 "X"
	estadd local yfes "X"
	estadd local cfes "X"
	estadd local bins "X"
	
// with fes

	eststo m4_`x': reghdfe `x' logbase personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt_adj],  a(year#countypair county bbin ubin) cluster(cluster)
	summ `x' [aw=wgt_adj] if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local samp "Border County Pairs" 
	//estadd local weighting "Firms in County Pair"
	estadd local weighting2 "X"
	estadd local ycpfes "X"
	estadd local cfes "X"
	estadd local bins "X"

}

esttab m1_logenter m2_logenter m1_entryrate m2_entryrate using appendix_table_a3.tex, replace b(%9.3f) se  ///
	coeflabel(logtax "Log(new rate*base)" logbase "Log(base)" wgt_newrate "New rate" personalinc "Personal inc. tax rate" corporate "Corporate tax rate" log_benefit "Log(UI benefits) (\textdollar)" minwage "Minimum wage (\textdollar)" stunemploymentrate "State unemp. rate") star(* 0.10 ** 0.05 *** 0.01)   ///
	stats(r2 ymean yfes cfes N, label("R$^2$" "Mean Outcome" "Year FEs"  "Unemp. and Benefit Bin FEs" "County FEs" "N") ///
	fmt(%9.3f %9.3f 0 0 0/*0 0 0*/ %9.0fc)) nomtitles unstack nonote noobs fragment label  nobaselevels nomtitle postfoot(\bottomrule) order(logbase) keep(logbase)		
	
esttab m3_logenter m4_logenter m3_entryrate m4_entryrate using table_3.tex, replace b(%9.3f) se  ///
	coeflabel(logtax "Log(new rate*base)" logbase "Log(base)" wgt_newrate "New rate" personalinc "Personal inc. tax rate" corporate "Corporate tax rate" log_benefit "Log(UI benefits) (\textdollar)" minwage "Minimum wage (\textdollar)" stunemploymentrate "State unemp. rate") star(* 0.10 ** 0.05 *** 0.01)   ///
	stats(r2 ymean yfes cfes bins ycpfes  N, label("R$^2$" "Mean Outcome" "Year FEs" "County FEs"  "Unemp. and Benefit Bin FEs" "Year-County Pair FEs" "N") ///
	fmt(%9.3f %9.3f 0 0 0 0 /* 0 0*/ %9.0fc)) nomtitles unstack nonote noobs fragment label  nobaselevels nomtitle postfoot(\bottomrule) order(logbase) keep(logbase)	

/*****************************/
/* Table 4: Higher UI taxes predict usage of contract labor in construction */
/*****************************/

/* The CCN and LBD datasets are confidential, and can only be accessed through a Census RDC */

/*
use ccn2012, clear
drop if lbdnum==""
drop if empq1==0 & empq2==0 & empq3==0 & empq4==0
gen meanemp = (empq1+empq2+empq3+empq4)/4
rename rcptot tvs
rename stfips fipsst
keep fipsst lbdnum pchtemp payann meanemp tvs empq1 empq2 empq3 empq4
gen yr = 2012
merge 1:1 lbdnum yr using lbd2012, keepusing(emp firstyear firmid mu naics pay)
keep if _merge==3
drop _merge

append using ccn2007
drop if lbdnum==""
drop if empq1==0 & empq2==0 & empq3==0 & empq4==0
destring fipsst, replace
drop if fipsst > 56
replace pchtemp = ctemp if pchtemp==.
replace yr = 2007 if yr==.
replace payann = payancw + payanoc if payann==.
replace meanemp = (empq1+empq2+empq3+empq4)/4 if meanemp==.
merge 1:1 lbdnum yr using lbd2007, keepusing(emp firstyear firmid mu naics pay) update replace
drop if _merge==2 | (_merge==1 & yr==2007)
drop _merge
rename fipsst state
keep state lbdnum firmid pchtemp yr payann meanemp emp firstyear empq1 empq2 empq3 empq4 mu tvs naics

gen n2 = 23
gen n3 = substr(naics,1,3)
drop if n3!="236" & n3!="237" & n3!="238"
destring n3, replace

//Merge in tax information
gen year = yr
merge m:1 n2 state year using newemployers 
keep if _merge==3
drop _merge

//Limit to new firms
bysort firmid state year: egen entry = min(firstyear)
keep if (yr==2012 & entry>=2010) | (yr==2007 & entry>=2005) 
drop if (yr==2012 & entry==2012) | (yr==2007 & entry==2007)

drop if meanemp<1 | payann<=0
gen anytemp = 100*(pchtemp>0)
gen logtax = log(newrate*base/100) 
gen log_emp = log(meanemp)
gen logpay = log(payann)
gen log_benefit = log(benefit)

//Upweight by match rate
merge 1:1 lbdnum yr using ccnweights
keep if _merge==3
drop _merge
gen empweight = meanemp
gen matchweight = weight*meanemp

// Generate controls
global tcs corporate personalinc minwage log_benefit urate 
gen logtax_2007 = logtax*(year==2007)
gen logtax_2012 = logtax*(year==2012)
xtile urate_bins = urate, nquant(20)
xtile benefit_bins = benefit, nquant(10)

est clear 
local y = "anytemp"
	eststo: reghdfe `y' logtax logpay $tcs [aw=empweight], a(urate_bins benefit_bins n3#state n3#year) cluster(state)
		estadd local Obs = round(e(N), 500)
		su `y' [aw=empweight] if e(sample)==1
		estadd scalar ymean = r(mean)
		estadd scalar y_sd = r(sd)
		su logtax [aw=empweight] if e(sample)==1
		estadd scalar xmean = r(mean)
		estadd scalar x_sd = r(sd)
		estadd local n3_state "x"
		estadd local n3_year "x"
		estadd local we "Emp"
	eststo: reghdfe `y' logtax_2007 logtax_2012 logpay $tcs [aw=empweight], a(urate_bins benefit_bins n3#state n3#year) cluster(state)
		estadd local Obs = round(e(N), 500)
		estadd local n3_state "x"
		estadd local n3_year "x"
		estadd local we "Emp"
	eststo: reghdfe `y' logtax logpay $tcs [aw=matchweight], a(urate_bins benefit_bins n3#state n3#year) cluster(state)
		estadd local Obs = round(e(N), 500)
		su `y' [aw=matchweight] if e(sample)==1
		estadd scalar ymean = r(mean)
		estadd scalar y_sd = r(sd)
		su logtax [aw=matchweight] if e(sample)==1
		estadd scalar xmean = r(mean)
		estadd scalar x_sd = r(sd)
		estadd local n3_state "x"
		estadd local n3_year "x"
		estadd local we "Emp+Match"
	eststo: reghdfe `y' logtax_2007 logtax_2012 logpay $tcs [aw=matchweight], a(urate_bins benefit_bins n3#state n3#year) cluster(state)
		estadd local Obs = round(e(N), 500)
		estadd local n3_state "x"
		estadd local n3_year "x"
		estadd local we "Emp+Match"
		
esttab using table4.csv, replace r2 /// 
	drop($tcs _cons) se stats(Obs n3_state n3_year we, label("N" "N3-by-StateFEs" "N3-by-YearFEs" "Weight")) star(* 0.10 ** 0.05 *** 0.01) note("Also includes controls for corporate personal minwage benefit_bins and urate_bins. Robust standard errors clustered at state level in parentheses.")
*/

}


{
	
/****************************/
/*	 PART III: 	Figures     */
/****************************/
/*****************************/
/* Figure 1: Newtax line graphs for all states */
/*****************************/
use data/newemployer.dta, clear
drop if sector=="55"

collapse newrate base, by(state year)
gen newtax = newrate*base/100
su newtax, d
bysort year: egen usavg = mean(newtax)
replace newtax = 1500 if newtax>1500
twoway (line newtax year if state==1, lc(gs12) lw(medthick) xlabel(2003(1)2014) ytitle(New rate * tax base ($))) ///
	(line newtax year if state==2, lc(gs12) lw(medthick)) ///
	(line newtax year if state==4, lc(gs12) lw(medthick)) ///
	(line newtax year if state==5, lc(gs12) lw(medthick)) ///
	(line newtax year if state==8, lc(gs12) lw(medthick)) ///
	(line newtax year if state==9, lc(gs12) lw(medthick)) ///
	(line newtax year if state==10, lc(gs12) lw(medthick)) ///
	(line newtax year if state==11, lc(gs12) lw(medthick)) ///
	(line newtax year if state==12, lc(gs12) lw(medthick)) ///
	(line newtax year if state==13, lc(gs12) lw(medthick)) ///
	(line newtax year if state==15, lc(gs12) lw(medthick)) ///
	(line newtax year if state==16, lc(gs12) lw(medthick)) ///
	(line newtax year if state==17, lc(gs12) lw(medthick)) ///
	(line newtax year if state==18, lc(gs12) lw(medthick)) ///
	(line newtax year if state==19, lc(gs12) lw(medthick)) ///
	(line newtax year if state==20, lc(gs12) lw(medthick)) ///
	(line newtax year if state==21, lc(gs12) lw(medthick)) ///
	(line newtax year if state==22, lc(gs12) lw(medthick)) ///
	(line newtax year if state==23, lc(gs12) lw(medthick)) ///
	(line newtax year if state==24, lc(gs12) lw(medthick)) ///
	(line newtax year if state==25, lc(gs12) lw(medthick)) ///
	(line newtax year if state==26, lc(gs12) lw(medthick)) ///
	(line newtax year if state==27, lc(gs12) lw(medthick)) ///
	(line newtax year if state==28, lc(gs12) lw(medthick)) ///
	(line newtax year if state==29, lc(gs12) lw(medthick)) ///
	(line newtax year if state==30, lc(gs12) lw(medthick)) ///
	(line newtax year if state==31, lc(gs12) lw(medthick)) ///
	(line newtax year if state==32, lc(gs12) lw(medthick)) ///
	(line newtax year if state==33, lc(gs12) lw(medthick)) ///
	(line newtax year if state==34, lc(gs12) lw(medthick)) ///
	(line newtax year if state==35, lc(gs12) lw(medthick)) ///
	(line newtax year if state==36, lc(gs12) lw(medthick)) ///
	(line newtax year if state==37, lc(gs12) lw(medthick)) ///
	(line newtax year if state==38, lc(gs12) lw(medthick)) ///
	(line newtax year if state==39, lc(gs12) lw(medthick)) ///
	(line newtax year if state==40, lc(gs12) lw(medthick)) ///
	(line newtax year if state==41, lc(gs12) lw(medthick)) ///
	(line newtax year if state==42, lc(gs12) lw(medthick)) ///
	(line newtax year if state==44, lc(gs12) lw(medthick)) ///
	(line newtax year if state==45, lc(gs12) lw(medthick)) ///
	(line newtax year if state==46, lc(gs12) lw(medthick)) ///
	(line newtax year if state==47, lc(gs12) lw(medthick)) ///
	(line newtax year if state==48, lc(gs12) lw(medthick)) ///
	(line newtax year if state==49, lc(gs12) lw(medthick)) ///
	(line newtax year if state==50, lc(gs12) lw(medthick)) ///
	(line newtax year if state==51, lc(gs12) lw(medthick)) ///
	(line newtax year if state==54, lc(gs12) lw(medthick)) ///
	(line newtax year if state==55, lc(gs12) lw(medthick)) ///
	(line newtax year if state==56, lc(gs12) lw(medthick)) ///
	(line newtax year if state==6, lc(navy) lw(medthick) lp(shortdash)) ///
	(line newtax year if state==53, lc(navy) lw(medthick) lp(dash)) ///
	(line usavg year if state==6, lc(black) lw(thick) xtitle(Year) legend(order(50 "California" 51 "Washington" 52 "US mean")))
graph export figure_1.pdf, replace

/*****************************/
/* Figure 2: Great Recession case study - firm entry */
/*****************************/

	use bds_analysis_sample_2025, clear

	gen treat = 1 if inlist(state,2,15,16,19,27,30,41,44,49,53,56)
	replace treat = 0 if inlist(state,32,34,35,37,38,40) 
	drop if treat==.
	gen newtax = newrate*base/100
	drop if year<=2004 | year>=2014

	** Appendix figure of state variables for GR case study **
	preserve 
	collapse newtax stunemploymentrate benefit entryrate , by(treat year)
	twoway (connected newtax year if treat==0) (connected newtax year if treat==1, msymbol(D)), xline(2010) name(raw, replace) legend(off) xlabel(2005(2)2013) ytitle("New Employer Tax ($)") xtitle("") ylabel(400(200)800)
	twoway (connected benefit year if treat==0) (connected benefit year if treat==1, msymbol(D)), xline(2010) name(raw2, replace) legend(off) xlabel(2005(2)2013) ytitle("UI Benefit ($)") xtitle("")
	twoway (connected stunemp year if treat==0) (connected stunemp year if treat==1, msymbol(D)), xline(2010) name(raw3, replace) legend(order(1 "Control" 2 "Treatment")) xlabel(2005(2)2013) ytitle("Unemp Rate (%)") ylabel(4(2)8)
	graph combine raw raw2 raw3, col(1) xsize(4)
	graph export appendix_figure_a5.pdf, replace
	restore

	gen temp = newtax if year==2008
	bysort n2 state: egen baseline = max(temp)
	gen delta = newtax - baseline
	tab year, su(delta)

	forval i = 2005/2013 {
		gen treat_`i' = treat*(year==`i')
		label var treat_`i' "`i'"
	}
	replace treat_2008 = 0
	
	reghdfe logtax treat_* personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt], a(bbin ubin n2#year n2#state) cluster(cluster)
	//Used for calculation of elasticity in section 5.1: .45 in 2010; .55 in 2011; .49 in 2012; .43 in 201
	
	reghdfe logenter treat_* personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt], a(bbin ubin n2#year n2#state) cluster(cluster)
	coefplot, omitted vertical yline(0) ciopts(recast(rcap)) drop(personalinc corporate log_benefit minwage stunemploymentrate logmax logtax _cons) ytitle("Coefficient on Treatment") xline(5.5, lpattern(dash) lcolor(gs12)) name(a, replace)
	graph export figure_2a.pdf, replace
	
	reghdfe entryrate treat_* personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt], a(bbin ubin n2#year n2#state) cluster(cluster)
	coefplot, omitted vertical yline(0) ciopts(recast(rcap)) drop(personalinc corporate log_benefit minwage stunemploymentrate logmax logtax _cons) /* title("Entry Rate")*/ ytitle("Coefficient on Treatment") xline(5.5, lpattern(dash) lcolor(gs12)) name(b, replace) 
	graph export figure_2b.pdf, replace
	
	// example in paper: restaurants in Washington vs. New Mexico (72)
		tab year if n2 == 72 & state == 53 , sum(newtax) // washington, 368 in 2008 > 650 in 2010 
			di (650.01782 - 368.39017)/ 368.39017 // = 76% increase 
		tab year if n2 == 72 & state == 35, sum(newtax) // new mexico, 393 in 2008 > 416 in 2010 		
			di (416-393)/393 // 6% increase
		
		di .76*-0.04 - 0.06*-0.04
		
/********************************/
/* Figure 3: Broader event study - firm entry, entry employment per firm, survival */
/********************************/

	use bds_analysis_sample_2025, clear

	egen id = group(state n2)

	tsset id year

	gen change_tax = (tax-L1.tax)/L1.tax //% change in newtax relative to year before

	gen flag = change_tax!=. & change_tax>=0.2 //Top 10% of observations; top quarter of increases
	gen year_increase = year if flag == 1
	bysort id: egen first_year_increase = min(year_increase)
	bysort id: egen last_year_increase = max(year_increase)
	gen never_big = first_year_increase==.

	gen ry = year - first_year_increase 
	gl max_ry = 4
	gl min_ry = 4
	drop if ry!=. & (ry<-$min_ry | ry>$max_ry)
	forval k = $min_ry(-1)2 {
		gen g_`k' = ry == -`k'
		lab var g_`k' "-`k'"
	}
	forval k = 0(1)$max_ry {
		gen g`k' = ry == `k'
		lab var g`k' "`k'"
	}

	gen log_emp_perfirm = log(emp_perfirm)
	replace log_emp_perfirm = 0 if emp_perfirm==0
	
	local titlelogenter "Log Number of Firms"
	local titleentryrate "Entry Rate"
	
	local filename1 "appendix_figure_a6"
	local filename2 "figure_3a"
	local filename3 "figure_3b"
	local filename4 "figure_5a"
	local filename5 "figure_6a"
	
	local n = 1
	foreach yvar of varlist tax logtax logenter entryrate exit_age1 log_emp_perfirm {

	eventstudyinteract `yvar' g_* g0-g$max_ry [aw=wgt], cohort(first_year_increase) control_cohort(never_big) covariates(personalinc corporate log_benefit minwage stunemploymentrate) absorb(ubin bbin n2#year id) vce(cluster cluster)

		gen temp = e(sample)

			matrix C = e(b_iw)
			mata st_matrix("A",sqrt(diagonal(st_matrix("e(V_iw)"))))
			matrix C = C \ A'
			matrix list C
		
			// manually add in omitted t-1 
			mata: original_matrix = st_matrix("C")
			mata: new_column = (0 \ .)
				local position = 4 // position where the new column will be inserted
			mata: left_part = original_matrix[,1..`position'-1]
			mata: right_part = original_matrix[,`position'..cols(original_matrix)]
			mata: C2 = left_part, new_column, right_part		
			mata: st_matrix("C2",C2)
			matrix colnames C2 = "-4" "-3" "-2" "-1" "0" "1" "2" "3" "4"
		
			sum `yvar' if temp == 1 [aw=wgt]
				local meano = round(`r(mean)',1)
				local N = `r(N)'
			sum `yvar' if temp==1 & never_big==1 [aw=wgt]
				local meano_c = round(`r(mean)',1)
				local N_c = `r(N)'
			sum `yvar' if temp==1 & never_big==0 [aw=wgt]
				local meano_t = round(`r(mean)',1)
				local N_t = `r(N)'
			drop temp 	
			
			sum change_tax if never_big==1 	
				local meanchange_c = `r(mean)'		
			forval i=0(1)2 {
				sum change_tax if never_big==0 & ry==`i'
					local meanchange_t = `r(mean)'
				local gap`i' = `meanchange_t' - `meanchange_c'
				local boe`i' = round(C[1,$min_ry + `i']/`gap`i'',.01)
			}
			
			coefplot matrix(C2[1]), se(C2[2]) ciopts(recast(rcap)) vertical yline(0) name(change`yvar', replace) xtitle("Years since first tax increase") ytitle("Coefficient on Event Time") /*title("`title`yvar''")*/ // note("N=`N' (N control=`N_c'; N treated=`N_t')." "Mean outcome=`meano' (control=`meano_c', treat=`meano_t')" "BOE implied elasticity: t=0: `boe0'; +1: `boe1'; +2: `boe2'") title("{&Delta}>20% increase: `yvar'")
			
		graph export `filename`n''.pdf, replace
		local n = `n' + 1
	}
	
	// elasticies for draft 
		eventstudyinteract logtax g_* g0-g$max_ry [aw=wgt], cohort(first_year_increase) control_cohort(never_big) covariates(personalinc corporate log_benefit minwage stunemploymentrate) absorb(ubin bbin n2#year id) vce(cluster cluster)
			// g0 0.28
			// g1 0.38
			// g2 0.37
			// g3 0.36
			// g4 0.38
		eventstudyinteract tax g_* g0-g$max_ry [aw=wgt], cohort(first_year_increase) control_cohort(never_big) covariates(personalinc corporate log_benefit minwage stunemploymentrate) absorb(ubin bbin n2#year id) vce(cluster cluster)			
			
		eventstudyinteract logenter g_* g0-g$max_ry [aw=wgt], cohort(first_year_increase) control_cohort(never_big) covariates(personalinc corporate log_benefit minwage stunemploymentrate) absorb(ubin bbin n2#year id) vce(cluster cluster)
		// g0:  di -0.0112841 / .279404 = -0.04
		// g1:  di -0.031179 / .3809321 = -.08184923
		// g2:  di -0.0457725 / .3719122 = -.1230734
		// g3:  di -0.0471112 / .3582951 = -.13148715		
		
		
	// mean exit rates for draft 
	eventstudyinteract exit_age1 g_* g0-g$max_ry [aw=wgt], cohort(first_year_increase) control_cohort(never_big) covariates(personalinc corporate log_benefit minwage stunemploymentrate) absorb(ubin bbin n2#year id) vce(cluster cluster)
		sum exit_age1 if e(sample) [aw=wgt] // 23.55
		sum exit_age1 if e(sample) // 24
		
		// coef: g0 = 1.48 > 1.48 / 23.55 = 6.3%
		
	// coefs for draft 
	eventstudyinteract log_emp_perfirm g_* g0-g$max_ry [aw=wgt], cohort(first_year_increase) control_cohort(never_big) covariates(personalinc corporate log_benefit minwage stunemploymentrate) absorb(ubin bbin n2#year id) vce(cluster cluster)
	
		// g0 = -0.02, g1: -0.025, g2: -0.036, g3: -0.044

		
	**Appendix: Robust to excluding 2010 and 2011 changes**
	drop if first_year_increase==2010 | first_year_increase==2011
	local titletax "New Employer Tax per Worker"
	local titlelogenter "Log Number of Firms"
	local titleentryrate "Entry Rate"
	local titlelog_emp_perfirm "Log Employment per Firm"
	
	foreach yvar of varlist tax logenter entryrate log_emp_perfirm {

	eventstudyinteract `yvar' g_* g0-g$max_ry [aw=wgt], cohort(first_year_increase) control_cohort(never_big) covariates(personalinc corporate log_benefit minwage stunemploymentrate) absorb(ubin bbin n2#year id) vce(cluster cluster)

		gen temp = e(sample)

			matrix C = e(b_iw)
			mata st_matrix("A",sqrt(diagonal(st_matrix("e(V_iw)"))))
			matrix C = C \ A'
			matrix list C
		
			// manually add in omitted t-1 
			mata: original_matrix = st_matrix("C")
			mata: new_column = (0 \ .)
				local position = 4 // position where the new column will be inserted
			mata: left_part = original_matrix[,1..`position'-1]
			mata: right_part = original_matrix[,`position'..cols(original_matrix)]
			mata: C2 = left_part, new_column, right_part		
			mata: st_matrix("C2",C2)
			matrix colnames C2 = "-4" "-3" "-2" "-1" "0" "1" "2" "3" "4"
		
			sum `yvar' if temp == 1 [aw=wgt]
				local meano = round(`r(mean)',1)
				local N = `r(N)'
			sum `yvar' if temp==1 & never_big==1 [aw=wgt]
				local meano_c = round(`r(mean)',1)
				local N_c = `r(N)'
			sum `yvar' if temp==1 & never_big==0 [aw=wgt]
				local meano_t = round(`r(mean)',1)
				local N_t = `r(N)'
			drop temp 	
			
			sum change_tax if never_big==1 	
				local meanchange_c = `r(mean)'		
			forval i=0(1)2 {
				sum change_tax if never_big==0 & ry==`i'
					local meanchange_t = `r(mean)'
				local gap`i' = `meanchange_t' - `meanchange_c'
				local boe`i' = round(C[1,$min_ry + `i']/`gap`i'',.01)
			}
			
			coefplot matrix(C2[1]), se(C2[2]) ciopts(recast(rcap)) vertical yline(0) name(change`yvar', replace) xtitle("Years since first tax increase") ytitle("Coefficient on Event Time") 
		graph export appendix_figure_a8_`yvar'.pdf, replace
	}
	

/**********************/
/* Figure 4: Sectoral Heterogeneity */
/***********************/	
use bds_analysis_sample_2025, clear

foreach x of varlist logenter entryrate {

	reghdfe `x' c.logtax#i.n2 personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt], a(n2#year n2#state ubin bbin) cluster(cluster)

	coefplot, sort xline(0) base drop(_cons personalinc corporate log_benefit minwage stunemploymentrate) /// //vertical
			coeflabels(21.n2#c.logtax = "Mining" 23.n2#c.logtax = "Construction" 31.n2#c.logtax = "Manufacturing" 42.n2#c.logtax = "Wholesale Trade" 44.n2#c.logtax = "Retail Trade" ///
					   48.n2#c.logtax = "Transportation/Warehousing" 51.n2#c.logtax = "Information" 52.n2#c.logtax = "Finance/Insurance" 53.n2#c.logtax = "Real Estate" ///
					   54.n2#c.logtax = "Pro/Sci/Tech Services" 55.n2#c.logtax = "Management" 56.n2#c.logtax = "Admin/Support/Waste Mgmt" 62.n2#c.logtax = "Health" ///
					   71.n2#c.logtax = "Arts/Recreation" 72.n2#c.logtax = "Accommodation/Food" 81.n2#c.logtax = "Other Services") xtitle("Coef (95% CI) on Log(new rate*base) for Sector")
	graph export figure_4_`x'.pdf, replace	
}

/**********************/
/* Figure 5: Survival by Age */
/***********************/
use survival_analysis_sample_2025, clear

foreach x in NEW MAX {
	matrix `x' = J(5,3,.)
	matrix coln `x' = coeff l95 u95
	matrix rown `x' = 1 2 3 4 5
}
est clear
local var = "exitrate"
reghdfe `var' logtaxXage* logmaxXage* personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt], a(n2#year n2#state ubin bbin cohort fage) cluster(cluster)

forval i = 1/5 {
	matrix NEW[`i',1] = _b[logtaxXage`i'], _b[logtaxXage`i']-1.96*_se[logtaxXage`i'], _b[logtaxXage`i']+1.96*_se[logtaxXage`i']
	matrix MAX[`i',1] = _b[logmaxXage`i'], _b[logmaxXage`i']-1.96*_se[logmaxXage`i'], _b[logmaxXage`i']+1.96*_se[logmaxXage`i']
}

coefplot (matrix(NEW[,1]), ci((NEW[,2] NEW[,3])) ciopts(recast(rcap)) offset(-0.1)) (matrix(MAX[,1]), ci((MAX[,2] MAX[,3])) msymbol(D)), yline(0) ciopts(recast(rcap)) offset(0.1) vertical ytitle("Coefficient (one s.d.)") legend(label(2 "log(new rate*base)") label(4 "log(max rate*base)")) name(a, replace) xtitle("Firm Age")
graph export figure_5b.pdf, replace 


/**********************/
/* Figure 6, Employment by Age */
/***********************/

** The lbd_age1to5 dataset is confidential and can only be accessed through a Census RDC **
/*
use lbd_age1to5, clear
keep if age>0
keep if year>=2003

// Keep relevant states, industries
destring state, replace
drop if state>56
drop if inlist(naics2,"11","22","55","61","92") // Agriculture, Utilities, Educ, and Public Admin

// Merge in tax data
destring naics2, gen(n2)
merge m:1 n2 state year using newemployers, keepusing(base newrate maxrate benefit minwage personalinc corporate urate logtax_norm logmax_norm) 
	keep if _merge==3
	drop _merge
gen logtax = log(base*newrate/100)
gen logmax = log(base*maxrate/100)
gen log_benefit = log(benefit)

// Restrict to balanced panel (survives until age 5)
bysort firmid state n2: egen bal = count(year)
gen cohort = year - age + 1
egen id = concat(firmid state n2)
keep if bal==5

// Generate controls 
global tcs corporate personalinc minwage log_benefit urate 
forval i = 1/5 {
	gen logtaxXage`i' = logtax*(age==`i')
	gen logmaxXage`i' = logmax*(age==`i')
	gen logtax_normXage`i' = logtax_norm*(age==`i')
	gen logmax_normXage`i' = logmax_norm*(age==`i')
}

xtile urate_bins = urate, nquant(20)
xtile benefit_bins = benefit, nquant(10)
egen cluster = concat(n2 state)

eststo clear 
	eststo: reghdfe log_emp logtaxXage* logmaxXage* $tcs,  a(urate_bins benefit_bins cohort age n2#state n2#year) cluster(cluster)
	
	forval age = 1/5 {
		su emp if e(sample)==1 & age==`age'
		estadd scalar age`age'_mean = r(mean)
	}
	su log_emp if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd scalar y_sd = r(sd)
	su logtax if e(sample)==1
	estadd scalar xmean = r(mean)
	estadd scalar x_sd = r(sd)
	su logmax if e(sample)==1
	estadd scalar xmean2 = r(mean)
	estadd scalar x_sd2 = r(sd)
	estadd local Obs = round(e(N),10^(floor(log10(e(N)))-3))

	eststo: reghdfe log_emp logtaxXage* logmaxXage* $tcs,  a(urate_bins benefit_bins id age n2#state n2#year) cluster(cluster)
	estadd local firm "x"
	estadd local Obs = round(e(N),10^(floor(log10(e(N)))-3))
	
	eststo: reghdfe log_emp logtax_normXage* logmax_normXage* $tcs,  a(urate_bins benefit_bins cohort age n2#state n2#year) cluster(cluster)
	su logtax_norm if e(sample)==1
	estadd scalar xmean = r(mean)
	estadd scalar x_sd = r(sd)
	su logmax_norm if e(sample)==1
	estadd scalar xmean2 = r(mean)
	estadd scalar x_sd2 = r(sd)
	estadd local Obs = round(e(N),10^(floor(log10(e(N)))-3))

	eststo: reghdfe log_emp logtax_normXage* logmax_normXage* $tcs,  a(urate_bins benefit_bins id age n2#state n2#year) cluster(cluster)
	estadd local firm "x"
	estadd local Obs = round(e(N),10^(floor(log10(e(N)))-3))

esttab using figure_6a.csv, replace r2 se noobs drop(_cons) stats(Obs ymean y_sd xmean x_sd xmean2 x_sd2 age1_mean age2_mean age3_mean age4_mean age5_mean firm, label("N" "Y Mean" "Y SD" "logtax Mean" "logtax SD" "logmax Mean" "logmax SD" "Age 1 Mean" "Age 2 Mean" "Age 3 Mean" "Age 4 Mean" "Age 5 Mean" "FirmFEs")) star(* 0.10 ** 0.05 *** 0.01) note("Includes urate_bins benefit_bins age cohort N2-by-year and N2-by-state FE's. Robust standard errors clustered at N2-by-state level in parentheses.")
*/

** Graph is generated from estimates disclosed from RDC **
clear all
matrix NEW = J(5,3,.)
matrix coln NEW = coeff l95 u95
matrix rown NEW = 1 2 3 4 5
matrix NEW[1,1] = -0.0153,	-0.0153-1.96*0.00275, -0.0153+1.96*0.00275
matrix NEW[2,1] = -0.0123,	-0.0123-1.96*0.00249, -0.0123+1.96*0.00249
matrix NEW[3,1] = -0.00966, -0.00966-1.96*0.00245, -0.00966+1.96*0.00245
matrix NEW[4,1] = -0.00794,	-0.00794-1.96*0.0023, -0.00794+1.96*0.0023
matrix NEW[5,1] = -0.00736,	-0.00736-1.96*0.00247, -0.00736+1.96*0.00247

matrix MAX = J(5,3,.)
matrix coln MAX = coeff l95 u95
matrix rown MAX = 1 2 3 4 5
matrix MAX[1,1] = 0.000807,	0.000807-1.96*0.00279, 0.000807+1.96*0.00279
matrix MAX[2,1] = -0.000637,	-0.000637-1.96*0.00252, -0.000637+1.96*0.00252
matrix MAX[3,1] = -0.00158,	-0.00158-1.96*0.00245, -0.00158+1.96*0.00245
matrix MAX[4,1] = -0.000607, -0.000607-1.96*0.00224, -0.000607+1.96*0.00224
matrix MAX[5,1] = 0.00148,	0.00148-1.96*0.00231,  0.00148+1.96*0.00231

coefplot (matrix(NEW[,1]), ci((NEW[,2] NEW[,3])) ciopts(recast(rcap)) offset(-0.1)) (matrix(MAX[,1]), ci((MAX[,2] MAX[,3])) msymbol(D)), yline(0) ciopts(recast(rcap)) offset(0.1) vertical ytitle("Coefficient (one s.d.)") legend(label(2 "log(new rate*base)") label(4 "log(max rate*base)")) xtitle("Firm Age") ylabel(-0.02(.01)0.01)
graph export figure_6b.pdf, replace 
}


/******************************/
/* PART IV: Appendix Tables and Figures  */
/******************************/

/*****************************/
/* Table A.2: UI tax costs by sector */
/*****************************/
use data/2011.q1-q4.singlefile.dta, clear
keep if aggl==14
collapse (sum) qtrly_c taxable_q total_q, by(ind)

rename industry sector
gen rate = 100*qtrly_c/taxable_q 
gen effrate = 100*qtrly_c/total_q 

replace sector = "Mining" if sector=="21"
replace sector = "Construction" if sector=="23"
replace sector = "Manufacturing" if sector=="31-33"
replace sector = "Wholesale Trade" if sector=="42"
replace sector = "Retail Trade" if sector=="44-45"
replace sector = "Transportation/Warehousing" if sector=="48-49"
replace sector = "Information" if sector=="51"
replace sector = "Finance/Insurance" if sector=="52"
replace sector = "Real Estate" if sector=="53"
replace sector = "Prof/Sci/Tech Services" if sector=="54"
replace sector = "Admin/Support/Waste Mgmt" if sector=="56"
replace sector = "Health" if sector=="62"
replace sector = "Arts/Recreation" if sector=="71"
replace sector = "Accommodation/Food" if sector=="72"
replace sector = "Other Services" if sector=="81"
drop if length(sector)<5

l sector rate effrate

/*****************************/
/* Table A.3: Already generated under Table 2 above*/
/*****************************/

/*****************************/
/* Table A.4: BFS Registrations */
/*****************************/
use bfs_analysis_sample_2025, clear

global cs personalinc corporate log_benefit minwage stunemploymentrate  
lab var logenter "Log(New Firms)"
lab var log_apps "Log(Applications)"
lab var log_apps_likely_payroll "Log(Applications with Likely Payroll)"

// Set sample
reghdfe logenter logtax $cs [aw=wgt], a(year state ubin bbin) vce(robust)
keep if e(sample)==1

est clear
foreach x of varlist logenter log_apps log_apps_likely_payroll  {
		
		eststo: reghdfe `x' logbase $cs [aw=wgt], a(year state ubin bbin) vce(robust)
		summ `x' [aw=wgt] if e(sample)==1
		estadd scalar ymean = r(mean)
		estadd local state "X"
		estadd local year "X"
		estadd local bins "X"
	
}
esttab using appendix_table_a4.tex, replace b(%9.3f) se  ///
	coeflabel(logbase "Log(base)" personalinc "Personal inc. tax rate" corporate "Corporate tax rate" log_benefit "Log(UI benefits) (\textdollar)" minwage "Minimum wage (\textdollar)" stunemploymentrate "State unemp. rate") star(* 0.10 ** 0.05 *** 0.01) drop(_cons)  ///
	stats(r2 ymean year state bins N, label("R$^2$" "Mean Outcome" "Year FEs" "State FEs" "Unemp. and Benefit Bin FEs"  "N") fmt(%9.3f %9.3f 0 0 0)) nomtitles ///
		unstack nonote noobs  fragment label  nobaselevels   nomtitle postfoot(\bottomrule) order(logbase)


/*****************************/
/* Table A.5: Robustness check of base and average wages */
/*****************************/
use bds_analysis_sample_2025, clear

merge m:1 state sector using data/avgwage 
keep if _merge==3
drop _merge 

**Adjust back to non-inflation adjusted dollars
merge m:1 year using data/cpi
keep if _merge==3
drop _merge
replace avg = avg/(108.547/cpi) 

lab var logenter "Log(New Firms)"
lab var entryrate "Entry Rate"

sum base, d // median = 10,500
gen basebin = base<=10500 
gen logtax2 = log((newrate*min(base,avg_wkly_wage*52))/100)
gen flag = logtax!=logtax2
tab sector, su(flag)

est clear
foreach x of varlist logenter entryrate {
	eststo: reghdfe `x' logtax2 personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt] , a(n2#year n2#state ubin bbin) cluster(cluster)
	summ `x' [aw=wgt] if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local n2_year "X"
	estadd local n2_state "X"
	estadd local bins "X"
	estadd local weighting "X"	

	forval i = 0(1)1{
	eststo: reghdfe `x' logtax personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt] if basebin==`i', a(n2#year n2#state ubin bbin) cluster(cluster)
	summ `x' [aw=wgt] if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local n2_year "X"
	estadd local n2_state "X"
	estadd local bins "X"
	estadd local weighting "X"	
	}
}
esttab using appendix_table_a5.tex, replace b(%9.3f) se  ///
	coeflabel(logtax2 "Log(new rate*adjusted base)" logtax "Log(new rate*base)" logbase "Log(base)" personalinc "Personal inc. tax rate" corporate "Corporate tax rate" log_benefit "Log(UI benefits) (\textdollar)" minwage "Minimum wage (\textdollar)" stunemploymentrate "State unemp. rate") star(* 0.10 ** 0.05 *** 0.01) drop(_cons)  ///
	stats(r2 ymean n2_year n2_state bins weighting N, label("R$^2$" "Mean Outcome" "Sector-Year FEs" "Sector-State FEs" "Unemp. and Benefit Bin FEs""Firm Weights" "N") fmt(%9.3f %9.3f 0 0 0 0 %9.0fc)) nomtitles 	unstack nonote noobs fragment label  nobaselevels   nomtitle postfoot(\bottomrule) order(logbase ) 


/*****************************/
/* Table A.6: Robustness check of weighting */
/*****************************/
use bds_analysis_sample_2025, clear

lab var logenter "Log(New Firms)"
lab var entryrate "Entry Rate"

sum wgt, d // total number of firms in sector-state-year combo

count 
	scalar tot = `r(N)'
count if wgt < 500 
	scalar pctl_500 = `r(N)'/tot 
count if wgt < 3429 
	scalar pctl_3429 = `r(N)'/tot 
scalar list

est clear
foreach x of varlist logenter entryrate {
	eststo: reghdfe `x' logtax personalinc corporate log_benefit minwage stunemploymentrate , a(n2#year n2#state ubin bbin) cluster(cluster)
	summ `x' if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local n2_year "X"
	estadd local n2_state "X"
	estadd local bins "X"
	estadd local weighting ""	
	
	eststo: reghdfe `x' logtax personalinc corporate log_benefit minwage stunemploymentrate if wgt >= 590 , a(n2#year n2#state ubin bbin) cluster(cluster)
	summ `x' if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local n2_year "X"
	estadd local n2_state "X"
	estadd local bins "X"
	estadd local weighting ""	
	
	eststo: reghdfe `x' logtax personalinc corporate log_benefit minwage stunemploymentrate if wgt >= 3429 , a(n2#year n2#state ubin bbin) cluster(cluster)
	summ `x' if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local n2_year "X"
	estadd local n2_state "X"
	estadd local bins "X"
	estadd local weighting ""	

}
esttab using appendix_table_a6.tex, replace b(%9.3f) se  ///
	coeflabel(logtax "Log(new rate*base)" logbase "Log(base)" personalinc "Personal inc. tax rate" corporate "Corporate tax rate" log_benefit "Log(UI benefits) (\textdollar)" minwage "Minimum wage (\textdollar)" stunemploymentrate "State unemp. rate") star(* 0.10 ** 0.05 *** 0.01) drop(_cons)  ///
	stats(r2 ymean n2_year n2_state bins weighting N, label("R$^2$" "Mean Outcome" "Sector-Year FEs" "Sector-State FEs" "Unemp. and Benefit Bin FEs""Firm Weights" "N") fmt(%9.3f %9.3f 0 0 0 0 %9.0fc)) nomtitles 	unstack nonote noobs	fragment label  nobaselevels   nomtitle postfoot(\bottomrule) order(logtax ) 


/****************************/
/* Figure A.1 Entry rate of age 0 Firms */
/****************************/
import delimited data/bds2020_st_sec_fa.csv, clear
rename st state
replace firms = "0" if firms=="(D)" | firms=="(X)"
destring firms, replace
collapse (sum) firms, by(fage year)
bysort year: egen tot = sum(firms)
keep if fage=="a) 0" | fage=="b) 1"

gen entryrate = 100*firms/tot // changed from /wgt
su entryrate, d 

twoway (line entryrate year if fage=="a) 0"), ytitle("Firm entry rate (%)") xtitle("Year")
graph export appendix_figure_a1.pdf, replace

/****************************/
/* Figure A.2 Average new employer tax costs by state -- This is created in R */
/****************************/
/*
library(ggplot2)
library(tidyverse)
library(utils)
library(usmap)
library(readstata13)  
library(scales)
uitax <- read.dta13("newemployer.dta")
uitax$fips <- uitax$state
table <- aggregate(data = uitax, newtax ~ fips, FUN = "mean")
adjusted_map <- plot_usmap(data = table, values = "newtax")
adjusted_map + theme(legend.position = c(0.925, 0.175)) + labs(fill = "Per-Capita Tax($)") + scale_fill_distiller(breaks = pretty_breaks(n = 5), trans = "reverse")

*/

/****************************/
/* Figure A.3 Sectoral variation in new employer rates over time (2003-2014) */
/****************************/
use data/newemployer, clear
keep if sector=="44-45" | sector=="23"
keep if inlist(state,8,9,16,25,31,39,47,49,53)

twoway (connected newrate year if sector=="23") (connected newrate year if sector=="44-45",msymbol(T)), by(state, note("")) legend(order(1 "Construction" 2 "Retail Trade")) ytitle("New Employer Rate (%)") xlabel(2003(5)2014)
	graph export appendix_figure_a3.pdf, replace
	
/****************************/
/* Figure A.4 Comparison of new employer rates and industry averages (2011) */
/****************************/
use data/2011.q1-q4.singlefile.dta, clear
keep if aggl==54 & qtr==1
gen indavg = 100*(qtrly_c/taxable_q)
gen fips = substr(area,1,2)
destring fips, replace
merge m:m fips using data/state_abbreviations
keep if _merge==3
drop _merge

rename industry sector
keep if inlist(sector,"23","44-45","56","72")
rename fips state
merge 1:m state year sector using data/newemployer, keepusing(newrate)
keep if _merge==3
drop _merge

replace sector = "Construction" if sector=="23"
replace sector = "Retail Trade" if sector=="44-45"
replace sector = "Admin/Support/Waste Mgmt" if sector=="56"
replace sector = "Accommodation/Food" if sector=="72"

gen diagonal = _n/20
twoway (scatter newrate indavg, mlabel(State_Abbrev) by(sector, note("") legend(off))) (line diagonal diagonal), xtitle("Industry Average Rate (%)") ytitle("New Employer Rate (%)")
graph export appendix_figure_a4.pdf, replace

/****************************/
/* Figure A.5 Already generated under Figure 2 above */
/****************************/

/****************************/
/* Figure A.6 Already generated under Figure 3 above */
/****************************/

/****************************/
/* Figure A.7 Binscatters for Entry */
/****************************/	
use bds_analysis_sample_2025.dta, clear
egen n2_year = concat(n2 year) 
egen n2_state = concat(n2 state)
destring n2_state n2_year, replace

label var logenter "Log(Number of New Firms)"
label var entryrate "Entry Rate"
label var logtax "Log(new rate*base)"

foreach x of varlist logenter entryrate {

	binscatter2 `x' logtax [aw=wgt], absorb(bbin ubin n2_state n2_year) control( personalinc corporate log_benefit minwage stunemploymentrate) altcontrols ///
		xtitle("Log(new rate*base)") ytitle(`: variable label `x'')

	graph export appendix_figure_a7_`x'.pdf, replace
}

/****************************/
/* Figure A.8 Already generated under Figure 3 above */
/****************************/

/****************************/
/* Figure A.9 Exit - Benchmarking to other state variables */
/****************************/
use survival_analysis_sample_2025, clear

foreach x in NEW MAX CORP PERSONAL MWAGE UR {
	matrix `x' = J(5,3,.)
	matrix coln `x' = coeff l95 u95
	matrix rown `x' = 1 2 3 4 5
}

local var = "exitrate"
	eststo: reghdfe `var' logtaxXage* logmaxXage* corpXage* personalXage* log_benefit minwage stunemploymentrate [aw=wgt], a(n2#year n2#state ubin bbin cohort fage) cluster(cluster)
	summ `var' [aw=wgt] if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local n2_state "X"
	estadd local cohort "X"
	estadd local weighting "X"
forval i = 1/5 {
	matrix NEW[`i',1] = _b[logtaxXage`i'], _b[logtaxXage`i']-1.96*_se[logtaxXage`i'], _b[logtaxXage`i']+1.96*_se[logtaxXage`i']
	matrix MAX[`i',1] = _b[logmaxXage`i'], _b[logmaxXage`i']-1.96*_se[logmaxXage`i'], _b[logmaxXage`i']+1.96*_se[logmaxXage`i']
	matrix CORP[`i',1] = _b[corpXage`i'], _b[corpXage`i']-1.96*_se[corpXage`i'], _b[corpXage`i']+1.96*_se[corpXage`i']
	matrix PERSONAL[`i',1] = _b[personalXage`i'], _b[personalXage`i']-1.96*_se[personalXage`i'], _b[personalXage`i']+1.96*_se[personalXage`i']
}

coefplot (matrix(NEW[,1]), ci((NEW[,2] NEW[,3])) ciopts(recast(rcap)) offset(-0.1)) (matrix(MAX[,1]), ci((MAX[,2] MAX[,3])) msymbol(D) ciopts(recast(rcap))) (matrix(CORP[,1]), ci((CORP[,2] CORP[,3])) msymbol(T) mcolor(gs12) ciopts(color(gs12) recast(rcap)) offset(0.1)) (matrix(PERSONAL[,1]), ci((PERSONAL[,2] PERSONAL[,3])) msymbol(S) mcolor(gs12) ciopts(color(gs12) recast(rcap)) offset(0.15)), yline(0) vertical ytitle("Coefficient (one s.d.)") legend(label(2 "log(new rate*base)") label(4 "log(max rate*base)") label(6 "corporate inc") label(8 "personal inc")) xtitle("Firm Age")
graph export appendix_figure_a9a.pdf, replace 

	eststo: reghdfe `var' logtaxXage* logmaxXage* mwageXage* URXage* corporate personalinc log_benefit [aw=wgt], a(n2#year n2#state ubin bbin cohort fage) cluster(cluster)
	summ `var' [aw=wgt] if e(sample)==1
	estadd scalar ymean = r(mean)
	estadd local n2_state "X"
	estadd local cohort "X"
	estadd local weighting "X"
forval i = 1/5 {
	matrix NEW[`i',1] = _b[logtaxXage`i'], _b[logtaxXage`i']-1.96*_se[logtaxXage`i'], _b[logtaxXage`i']+1.96*_se[logtaxXage`i']
	matrix MAX[`i',1] = _b[logmaxXage`i'], _b[logmaxXage`i']-1.96*_se[logmaxXage`i'], _b[logmaxXage`i']+1.96*_se[logmaxXage`i']
	matrix MWAGE[`i',1] = _b[mwageXage`i'], _b[mwageXage`i']-1.96*_se[mwageXage`i'], _b[mwageXage`i']+1.96*_se[mwageXage`i']
	matrix UR[`i',1] = _b[URXage`i'], _b[URXage`i']-1.96*_se[URXage`i'], _b[URXage`i']+1.96*_se[URXage`i']
}

coefplot (matrix(NEW[,1]), ci((NEW[,2] NEW[,3])) ciopts(recast(rcap)) offset(-0.1)) (matrix(MAX[,1]), ci((MAX[,2] MAX[,3])) msymbol(D) ciopts(recast(rcap))) (matrix(MWAGE[,1]), ci((MWAGE[,2] MWAGE[,3])) msymbol(T) mcolor(gs12) ciopts(color(gs12) recast(rcap)) offset(0.1)) (matrix(UR[,1]), ci((UR[,2] UR[,3])) msymbol(S) mcolor(gs12) ciopts(color(gs12) recast(rcap)) offset(0.15)), yline(0) vertical ytitle("Coefficient (one s.d.)") legend(label(2 "log(new rate*base)") label(4 "log(max rate*base)") label(6 "minimum wage") label(8 "state unemp rate")) xtitle("Firm Age")
graph export appendix_figure_a9b.pdf, replace 


/****************************/
/* Figure A.10 Exit graph, by time to graduation */
/****************************/

use survival_analysis_sample_2025, clear

foreach x in NEW MAX {
	matrix `x' = J(5,3,.)
	matrix coln `x' = coeff l95 u95
	matrix rown `x' = 1 2 3 4 5
}

gen late = inlist(state,5,11,13,18,19,21,30,32,35,44,47,54,56)
gen mid = inlist(state,12,17,20,22,24,34,37,38,40,42,45,46,53,51)

local var = "exitrate"
reghdfe `var' logtaxXage* logmaxXage* personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt] if late==0 & mid==0, a(n2#year n2#state ubin bbin cohort fage) cluster(cluster)

forval i = 1/5 {
	matrix NEW[`i',1] = _b[logtaxXage`i'], _b[logtaxXage`i']-1.96*_se[logtaxXage`i'], _b[logtaxXage`i']+1.96*_se[logtaxXage`i']
	matrix MAX[`i',1] = _b[logmaxXage`i'], _b[logmaxXage`i']-1.96*_se[logmaxXage`i'], _b[logmaxXage`i']+1.96*_se[logmaxXage`i']
}

coefplot (matrix(NEW[,1]), ci((NEW[,2] NEW[,3])) ciopts(recast(rcap)) offset(-0.1)) (matrix(MAX[,1]), ci((MAX[,2] MAX[,3])) msymbol(D)), yline(0) ciopts(recast(rcap)) offset(0.1) vertical ytitle("Coefficient (one s.d.)") legend(label(2 "log(new rate*base)") label(4 "log(max rate*base)")) name(a, replace) xtitle("Firm Age") ylabel(-1(0.5)1.5) 
graph export appendix_figure_a10a.pdf, replace 

reghdfe `var' logtaxXage* logmaxXage* personalinc corporate log_benefit minwage stunemploymentrate [aw=wgt] if late==1 | mid==1, a(n2#year n2#state ubin bbin cohort fage) cluster(cluster)

forval i = 1/5 {
	matrix NEW[`i',1] = _b[logtaxXage`i'], _b[logtaxXage`i']-1.96*_se[logtaxXage`i'], _b[logtaxXage`i']+1.96*_se[logtaxXage`i']
	matrix MAX[`i',1] = _b[logmaxXage`i'], _b[logmaxXage`i']-1.96*_se[logmaxXage`i'], _b[logmaxXage`i']+1.96*_se[logmaxXage`i']
}

coefplot (matrix(NEW[,1]), ci((NEW[,2] NEW[,3])) ciopts(recast(rcap)) offset(-0.1)) (matrix(MAX[,1]), ci((MAX[,2] MAX[,3])) msymbol(D)), yline(0) ciopts(recast(rcap)) offset(0.1) vertical ytitle("Coefficient (one s.d.)") legend(label(2 "log(new rate*base)") label(4 "log(max rate*base)")) name(b, replace) xtitle("Firm Age") ylabel(-1(0.5)1.5)
graph export appendix_figure_a10b.pdf, replace 


